package com.xiaohu.cdh

import org.apache.spark.sql.{DataFrame, SparkSession}

object Demo01ClazzCnt {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession
      .builder()
      .appName("Demo01ClazzCnt")
      .config("spark.sql.shuffle.partitions", "2")
      .getOrCreate()

    val stuDF: DataFrame = spark
      .read
      .format("csv")
      .option("sep", ",")
      .schema("id String,name String,age Int,gender String,clazz String")
      .load("/data/student/students.txt")

    import spark.implicits._
    import org.apache.spark.sql.functions._

    stuDF.groupBy($"clazz")
      .agg(countDistinct($"id") as "cnt")
      .show(50)


  }

}
